from pyspark.context import SparkContext

sc = SparkContext(master='local', appName='accumulator')

students_rdd = sc.textFile('../../data/students.txt')
# 使用累加器可以依附某一个任务计算累加值
# 1，在Driver端定义累加器
acc = sc.accumulator(0)


def count_func(line):
    acc.add(1)
    clazz = line.split(',')[-1]
    return clazz


clazz_rdd = students_rdd.map(count_func)

clazz_rdd.foreach(print)

#3,再Driver端获取累加器的值
count = acc.value
print(f"count:{count}")

